| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.190901512444921, | |
| "global_step": 50000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.9999999999999997e-06, | |
| "loss": 1.0412, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.999999999999999e-06, | |
| "loss": 0.835, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.999999999999999e-06, | |
| "loss": 0.7822, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.1999999999999999e-05, | |
| "loss": 0.7718, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.4999999999999999e-05, | |
| "loss": 0.7707, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.7999999999999997e-05, | |
| "loss": 0.7697, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.769, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.3999999999999997e-05, | |
| "loss": 0.7682, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.6999999999999996e-05, | |
| "loss": 0.7674, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "loss": 0.767, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_runtime": 45.7675, | |
| "eval_samples_per_second": 235.975, | |
| "eval_steps_per_second": 7.385, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.2999999999999996e-05, | |
| "loss": 0.7665, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.5999999999999994e-05, | |
| "loss": 0.7662, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.9e-05, | |
| "loss": 0.7661, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.766, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.4999999999999996e-05, | |
| "loss": 0.7659, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.7999999999999994e-05, | |
| "loss": 0.7656, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 5.1e-05, | |
| "loss": 0.7655, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 5.399999999999999e-05, | |
| "loss": 0.7655, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5.6999999999999996e-05, | |
| "loss": 0.7653, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5.9999999999999995e-05, | |
| "loss": 0.7655, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_runtime": 45.5917, | |
| "eval_samples_per_second": 236.885, | |
| "eval_steps_per_second": 7.414, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 6.299999999999999e-05, | |
| "loss": 0.7651, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 6.599999999999999e-05, | |
| "loss": 0.7653, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 6.9e-05, | |
| "loss": 0.7654, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 7.199999999999999e-05, | |
| "loss": 0.765, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 7.5e-05, | |
| "loss": 0.7649, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 7.8e-05, | |
| "loss": 0.7648, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 8.1e-05, | |
| "loss": 0.7647, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.4e-05, | |
| "loss": 0.7645, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.699999999999999e-05, | |
| "loss": 0.7645, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.999999999999999e-05, | |
| "loss": 0.7644, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_runtime": 45.7281, | |
| "eval_samples_per_second": 236.179, | |
| "eval_steps_per_second": 7.392, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.3e-05, | |
| "loss": 0.7641, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.599999999999999e-05, | |
| "loss": 0.764, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.9e-05, | |
| "loss": 0.7638, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000102, | |
| "loss": 0.763, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00010499999999999999, | |
| "loss": 0.7665, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00010799999999999998, | |
| "loss": 0.7669, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00011099999999999999, | |
| "loss": 0.7653, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00011399999999999999, | |
| "loss": 0.7535, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.000117, | |
| "loss": 0.7218, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00011999999999999999, | |
| "loss": 0.6956, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_runtime": 45.9121, | |
| "eval_samples_per_second": 235.232, | |
| "eval_steps_per_second": 7.362, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00012299999999999998, | |
| "loss": 0.6758, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00012599999999999997, | |
| "loss": 0.6557, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000129, | |
| "loss": 0.6402, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00013199999999999998, | |
| "loss": 0.6302, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.000135, | |
| "loss": 0.623, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.000138, | |
| "loss": 0.6169, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014099999999999998, | |
| "loss": 0.6121, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014399999999999998, | |
| "loss": 0.607, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000147, | |
| "loss": 0.6039, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00015, | |
| "loss": 0.6012, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_runtime": 46.0979, | |
| "eval_samples_per_second": 234.284, | |
| "eval_steps_per_second": 7.332, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001499996172456075, | |
| "loss": 0.5981, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00014999846898661572, | |
| "loss": 0.5954, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014999655523558183, | |
| "loss": 0.5935, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014999387601343436, | |
| "loss": 0.5911, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014999043134947282, | |
| "loss": 0.5895, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014998622128136748, | |
| "loss": 0.5877, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.000149981245855159, | |
| "loss": 0.5866, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00014997550512525784, | |
| "loss": 0.5845, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001499689991544437, | |
| "loss": 0.5784, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00014996172801386482, | |
| "loss": 0.5684, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_runtime": 46.0154, | |
| "eval_samples_per_second": 234.704, | |
| "eval_steps_per_second": 7.345, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014995369178303722, | |
| "loss": 0.5642, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001499448905498439, | |
| "loss": 0.5625, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014993532441053364, | |
| "loss": 0.5601, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001499249934697203, | |
| "loss": 0.5581, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001499138978403813, | |
| "loss": 0.554, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014990203764385677, | |
| "loss": 0.5462, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014988941300984784, | |
| "loss": 0.5284, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001498760240764155, | |
| "loss": 0.5032, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.000149861870989979, | |
| "loss": 0.4751, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001498469539053142, | |
| "loss": 0.4574, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_runtime": 45.9402, | |
| "eval_samples_per_second": 235.088, | |
| "eval_steps_per_second": 7.357, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014983127298555198, | |
| "loss": 0.4453, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014981482840217632, | |
| "loss": 0.437, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014979762033502262, | |
| "loss": 0.4306, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014977964897227547, | |
| "loss": 0.4254, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014976091451046687, | |
| "loss": 0.4204, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014974141715447386, | |
| "loss": 0.4178, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014972115711751644, | |
| "loss": 0.4135, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014970013462115505, | |
| "loss": 0.4099, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014967834989528843, | |
| "loss": 0.4077, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014965580317815078, | |
| "loss": 0.405, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_runtime": 45.7648, | |
| "eval_samples_per_second": 235.989, | |
| "eval_steps_per_second": 7.386, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014963249471630944, | |
| "loss": 0.4017, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.000149608424764662, | |
| "loss": 0.4006, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001495835935864336, | |
| "loss": 0.3977, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00014955800145317397, | |
| "loss": 0.3964, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00014953164864475466, | |
| "loss": 0.3949, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001495045354493657, | |
| "loss": 0.3961, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00014947666216351272, | |
| "loss": 0.398, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00014944802909201344, | |
| "loss": 0.3924, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00014941863654799456, | |
| "loss": 0.3938, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00014938848485288825, | |
| "loss": 0.3885, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_runtime": 45.9868, | |
| "eval_samples_per_second": 234.85, | |
| "eval_steps_per_second": 7.35, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001493575743364286, | |
| "loss": 0.391, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00014932590533664808, | |
| "loss": 0.3884, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001492934781998738, | |
| "loss": 0.3856, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001492602932807237, | |
| "loss": 0.3843, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00014922635094210277, | |
| "loss": 0.3848, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.000149191651555199, | |
| "loss": 0.3795, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001491561954994793, | |
| "loss": 0.3735, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00014911998316268537, | |
| "loss": 0.3658, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00014908301494082963, | |
| "loss": 0.362, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00014904529123819054, | |
| "loss": 0.3595, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_runtime": 46.3224, | |
| "eval_samples_per_second": 233.148, | |
| "eval_steps_per_second": 7.297, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00014900681246730852, | |
| "loss": 0.3585, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00014896757904898125, | |
| "loss": 0.3578, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00014892759141225904, | |
| "loss": 0.3568, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00014888684999444035, | |
| "loss": 0.355, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00014884535524106675, | |
| "loss": 0.3537, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00014880310760591824, | |
| "loss": 0.3523, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001487601075510082, | |
| "loss": 0.3524, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001487163555465783, | |
| "loss": 0.3515, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001486718520710935, | |
| "loss": 0.3508, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00014862659761123663, | |
| "loss": 0.3493, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_runtime": 46.1625, | |
| "eval_samples_per_second": 233.956, | |
| "eval_steps_per_second": 7.322, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00014858059266190327, | |
| "loss": 0.3472, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00014853383772619612, | |
| "loss": 0.3463, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00014848633331541967, | |
| "loss": 0.3363, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001484380799490746, | |
| "loss": 0.3265, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00014838907815485194, | |
| "loss": 0.3235, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00014833932846862748, | |
| "loss": 0.3218, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00014828883143445582, | |
| "loss": 0.3203, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001482375876045644, | |
| "loss": 0.3204, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001481855975393476, | |
| "loss": 0.3184, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001481328618073604, | |
| "loss": 0.318, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_runtime": 46.1354, | |
| "eval_samples_per_second": 234.094, | |
| "eval_steps_per_second": 7.326, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001480793809853123, | |
| "loss": 0.3163, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00014802515565806107, | |
| "loss": 0.3155, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00014797018641860612, | |
| "loss": 0.314, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001479144738680823, | |
| "loss": 0.3136, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00014785801861575312, | |
| "loss": 0.3117, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00014780082127900416, | |
| "loss": 0.3086, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00014774288248333635, | |
| "loss": 0.3074, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00014768420286235908, | |
| "loss": 0.3074, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00014762478305778328, | |
| "loss": 0.3064, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001475646237194144, | |
| "loss": 0.3057, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_runtime": 46.1242, | |
| "eval_samples_per_second": 234.15, | |
| "eval_steps_per_second": 7.328, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00014750372550514533, | |
| "loss": 0.3048, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001474420890809492, | |
| "loss": 0.3037, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00014737971512087202, | |
| "loss": 0.3029, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00014731660430702552, | |
| "loss": 0.3024, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00014725275732957937, | |
| "loss": 0.3011, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00014718817488675387, | |
| "loss": 0.3006, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00014712285768481235, | |
| "loss": 0.3009, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00014705680643805323, | |
| "loss": 0.2991, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00014699002186880232, | |
| "loss": 0.2991, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00014692250470740503, | |
| "loss": 0.2979, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_runtime": 46.2531, | |
| "eval_samples_per_second": 233.498, | |
| "eval_steps_per_second": 7.308, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00014685425569221819, | |
| "loss": 0.2975, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00014678527556960207, | |
| "loss": 0.2955, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001467155650939123, | |
| "loss": 0.295, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00014664512502749141, | |
| "loss": 0.2941, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00014657395614066075, | |
| "loss": 0.2931, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001465020592117118, | |
| "loss": 0.2921, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001464294350268979, | |
| "loss": 0.2918, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00014635608438042546, | |
| "loss": 0.2907, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00014628200807444543, | |
| "loss": 0.2899, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001462072069190444, | |
| "loss": 0.2898, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_runtime": 46.2774, | |
| "eval_samples_per_second": 233.375, | |
| "eval_steps_per_second": 7.304, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00014613168173223585, | |
| "loss": 0.2885, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00014605543333995113, | |
| "loss": 0.288, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00014597846257603038, | |
| "loss": 0.2875, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001459007702822136, | |
| "loss": 0.2876, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00014582235730813128, | |
| "loss": 0.2862, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00014574322451129507, | |
| "loss": 0.2849, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00014566337275708863, | |
| "loss": 0.2852, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001455828029187579, | |
| "loss": 0.2833, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00014550151587740178, | |
| "loss": 0.2836, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00014541951252196225, | |
| "loss": 0.2817, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_runtime": 46.1169, | |
| "eval_samples_per_second": 234.187, | |
| "eval_steps_per_second": 7.329, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00014533679374921493, | |
| "loss": 0.2824, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00014525336046375905, | |
| "loss": 0.2817, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00014516921357800766, | |
| "loss": 0.2812, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00014508435401217759, | |
| "loss": 0.2812, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00014499878269427948, | |
| "loss": 0.2795, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014491250056010758, | |
| "loss": 0.2788, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014482550855322943, | |
| "loss": 0.2775, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001447378076249757, | |
| "loss": 0.2773, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014464939873442973, | |
| "loss": 0.2769, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014456028284841693, | |
| "loss": 0.2765, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_runtime": 46.3516, | |
| "eval_samples_per_second": 233.002, | |
| "eval_steps_per_second": 7.292, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014447046094149437, | |
| "loss": 0.2752, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014437993399594003, | |
| "loss": 0.2765, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001442887030017421, | |
| "loss": 0.2752, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014419676895658807, | |
| "loss": 0.2748, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.000144104132865854, | |
| "loss": 0.2739, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001440107957425933, | |
| "loss": 0.2729, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001439167586075258, | |
| "loss": 0.2722, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001438220224890265, | |
| "loss": 0.2725, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014372658842311449, | |
| "loss": 0.2726, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014363045745344137, | |
| "loss": 0.2715, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_runtime": 46.2247, | |
| "eval_samples_per_second": 233.641, | |
| "eval_steps_per_second": 7.312, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014353363063128005, | |
| "loss": 0.2705, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001434361090155131, | |
| "loss": 0.2706, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00014333789367262136, | |
| "loss": 0.2701, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00014323898567667202, | |
| "loss": 0.2693, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00014313938610930712, | |
| "loss": 0.2693, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00014303909605973154, | |
| "loss": 0.2691, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001429381166247012, | |
| "loss": 0.2681, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00014283644890851103, | |
| "loss": 0.2672, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00014273409402298291, | |
| "loss": 0.2671, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00014263105308745343, | |
| "loss": 0.2676, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_runtime": 46.3331, | |
| "eval_samples_per_second": 233.095, | |
| "eval_steps_per_second": 7.295, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00014252732722876176, | |
| "loss": 0.2654, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001424229175812373, | |
| "loss": 0.2649, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00014231782528668717, | |
| "loss": 0.2647, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00014221205149438394, | |
| "loss": 0.2649, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001421055973610528, | |
| "loss": 0.264, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00014199846405085913, | |
| "loss": 0.2647, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00014189065273539564, | |
| "loss": 0.2635, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00014178216459366958, | |
| "loss": 0.2623, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00014167300081208988, | |
| "loss": 0.2627, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00014156316258445421, | |
| "loss": 0.2932, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_runtime": 46.169, | |
| "eval_samples_per_second": 233.923, | |
| "eval_steps_per_second": 7.321, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00014145265111193583, | |
| "loss": 0.2645, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00014134146760307043, | |
| "loss": 0.2625, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00014122961327374313, | |
| "loss": 0.2615, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001411170893471749, | |
| "loss": 0.2605, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00014100389705390938, | |
| "loss": 0.26, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001408900376317994, | |
| "loss": 0.2583, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001407755123259933, | |
| "loss": 0.258, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014066032238892152, | |
| "loss": 0.2569, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014054446908028272, | |
| "loss": 0.2568, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014042795366703018, | |
| "loss": 0.2563, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_runtime": 46.2726, | |
| "eval_samples_per_second": 233.4, | |
| "eval_steps_per_second": 7.305, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001403107774233577, | |
| "loss": 0.256, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014019294163068597, | |
| "loss": 0.2548, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014007444757764835, | |
| "loss": 0.2543, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001399552965600768, | |
| "loss": 0.2537, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001398354898809877, | |
| "loss": 0.2531, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001397150288505678, | |
| "loss": 0.2531, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00013959391478615959, | |
| "loss": 0.2526, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00013947214901224706, | |
| "loss": 0.2522, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001393497328604412, | |
| "loss": 0.2515, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00013922666766946545, | |
| "loss": 0.2513, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_runtime": 46.224, | |
| "eval_samples_per_second": 233.645, | |
| "eval_steps_per_second": 7.312, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00013910295478514106, | |
| "loss": 0.2504, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001389785955603722, | |
| "loss": 0.2503, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00013885359135513154, | |
| "loss": 0.2501, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.000138727943536445, | |
| "loss": 0.2488, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013860165347837698, | |
| "loss": 0.2492, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013847472256201535, | |
| "loss": 0.2483, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013834715217545625, | |
| "loss": 0.248, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.000138218943713789, | |
| "loss": 0.2479, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0001380900985790808, | |
| "loss": 0.2485, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013796061818036138, | |
| "loss": 0.2467, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_runtime": 46.1546, | |
| "eval_samples_per_second": 233.996, | |
| "eval_steps_per_second": 7.323, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013783050393360768, | |
| "loss": 0.2468, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0001376997572617282, | |
| "loss": 0.2463, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013756837959454766, | |
| "loss": 0.2456, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001374363723687911, | |
| "loss": 0.2459, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013730373702806846, | |
| "loss": 0.2447, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013717047502285855, | |
| "loss": 0.245, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001370365878104933, | |
| "loss": 0.2446, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00013690207685514185, | |
| "loss": 0.2442, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001367669436277944, | |
| "loss": 0.2439, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001366311896062463, | |
| "loss": 0.2438, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_runtime": 46.5558, | |
| "eval_samples_per_second": 231.98, | |
| "eval_steps_per_second": 7.26, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00013649481627508181, | |
| "loss": 0.2436, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001363578251256578, | |
| "loss": 0.2429, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00013622021765608754, | |
| "loss": 0.2424, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00013608199537122425, | |
| "loss": 0.242, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001359431597826447, | |
| "loss": 0.2422, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001358037124086327, | |
| "loss": 0.2418, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00013566365477416233, | |
| "loss": 0.2407, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00013552298841088144, | |
| "loss": 0.2416, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00013538171485709486, | |
| "loss": 0.2411, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00013523983565774753, | |
| "loss": 0.2401, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_runtime": 46.0773, | |
| "eval_samples_per_second": 234.389, | |
| "eval_steps_per_second": 7.336, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00013509735236440766, | |
| "loss": 0.2401, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00013495426653524972, | |
| "loss": 0.2402, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00013481057973503742, | |
| "loss": 0.24, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00013466629353510651, | |
| "loss": 0.239, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00013452140951334787, | |
| "loss": 0.239, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00013437592925418985, | |
| "loss": 0.2388, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00013422985434858133, | |
| "loss": 0.238, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00013408318639397405, | |
| "loss": 0.2387, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00013393592699430525, | |
| "loss": 0.2372, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00013378807775998012, | |
| "loss": 0.2377, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_runtime": 46.2501, | |
| "eval_samples_per_second": 233.513, | |
| "eval_steps_per_second": 7.308, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00013363964030785422, | |
| "loss": 0.2373, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00013349061626121578, | |
| "loss": 0.238, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00013334100724976783, | |
| "loss": 0.2367, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001331908149096106, | |
| "loss": 0.2367, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00013304004088322342, | |
| "loss": 0.2356, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00013288868681944692, | |
| "loss": 0.2365, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00013273675437346487, | |
| "loss": 0.236, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00013258424520678618, | |
| "loss": 0.2356, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00013243116098722663, | |
| "loss": 0.2363, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00013227750338889077, | |
| "loss": 0.2345, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_runtime": 46.2738, | |
| "eval_samples_per_second": 233.394, | |
| "eval_steps_per_second": 7.304, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00013212327409215343, | |
| "loss": 0.2351, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001319684747836415, | |
| "loss": 0.2351, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001318131071562154, | |
| "loss": 0.2342, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00013165717290895067, | |
| "loss": 0.2338, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001315006737471192, | |
| "loss": 0.234, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001313436113821708, | |
| "loss": 0.233, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00013118598753171425, | |
| "loss": 0.2331, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001310278039194988, | |
| "loss": 0.2329, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00013086906227539506, | |
| "loss": 0.2332, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00013070976433537623, | |
| "loss": 0.2338, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_runtime": 46.2625, | |
| "eval_samples_per_second": 233.45, | |
| "eval_steps_per_second": 7.306, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00013054991184149905, | |
| "loss": 0.2325, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00013038950654188476, | |
| "loss": 0.2312, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00013022855019070005, | |
| "loss": 0.2323, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001300670445481378, | |
| "loss": 0.2319, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001299049913803978, | |
| "loss": 0.2324, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00012974239245966754, | |
| "loss": 0.2313, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001295792495641028, | |
| "loss": 0.2318, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00012941556447780813, | |
| "loss": 0.2309, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0001292513389908174, | |
| "loss": 0.231, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0001290865748990742, | |
| "loss": 0.2298, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_runtime": 46.1555, | |
| "eval_samples_per_second": 233.992, | |
| "eval_steps_per_second": 7.323, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00012892127400441228, | |
| "loss": 0.2302, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00012875543811453576, | |
| "loss": 0.2305, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001285890690429993, | |
| "loss": 0.2293, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00012842216860918846, | |
| "loss": 0.2298, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001282547386382996, | |
| "loss": 0.2296, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001280867809613201, | |
| "loss": 0.2291, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001279182974150082, | |
| "loss": 0.2279, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00012774928984187297, | |
| "loss": 0.2278, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00012757976009015413, | |
| "loss": 0.228, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001274097100138019, | |
| "loss": 0.2282, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_runtime": 46.6895, | |
| "eval_samples_per_second": 231.315, | |
| "eval_steps_per_second": 7.239, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00012723914147245663, | |
| "loss": 0.2276, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00012706805633142863, | |
| "loss": 0.2276, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00012689645646167755, | |
| "loss": 0.2281, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00012672434373979207, | |
| "loss": 0.2265, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00012655172004796936, | |
| "loss": 0.2286, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00012637858727399448, | |
| "loss": 0.227, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00012620494731121966, | |
| "loss": 0.2267, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00012603080205854372, | |
| "loss": 0.2266, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00012585615342039126, | |
| "loss": 0.2258, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001256810033066918, | |
| "loss": 0.226, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_runtime": 47.0689, | |
| "eval_samples_per_second": 229.451, | |
| "eval_steps_per_second": 7.181, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001255053536328589, | |
| "loss": 0.2257, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001253292063197693, | |
| "loss": 0.2256, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0001251525632937418, | |
| "loss": 0.2257, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00012497542648651615, | |
| "loss": 0.2248, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00012479779783523216, | |
| "loss": 0.225, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00012461967928240828, | |
| "loss": 0.2246, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00012444107277592047, | |
| "loss": 0.2247, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0001242619802689809, | |
| "loss": 0.2246, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00012408240372011647, | |
| "loss": 0.2238, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0001239023450931476, | |
| "loss": 0.2243, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_runtime": 47.1954, | |
| "eval_samples_per_second": 228.836, | |
| "eval_steps_per_second": 7.162, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00012372180635716656, | |
| "loss": 0.2235, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00012354078948651604, | |
| "loss": 0.2239, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00012335929646076758, | |
| "loss": 0.2231, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00012317732926469976, | |
| "loss": 0.2225, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00012299488988827675, | |
| "loss": 0.2233, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0001228119803266263, | |
| "loss": 0.223, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0001226286025800181, | |
| "loss": 0.2229, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00012244475865384177, | |
| "loss": 0.222, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00012226045055858505, | |
| "loss": 0.2217, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00012207568030981174, | |
| "loss": 0.2222, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_runtime": 47.0101, | |
| "eval_samples_per_second": 229.738, | |
| "eval_steps_per_second": 7.19, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00012189044992813972, | |
| "loss": 0.2213, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0001217047614392187, | |
| "loss": 0.2206, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00012151861687370828, | |
| "loss": 0.2221, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012133201826725558, | |
| "loss": 0.2209, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001211449676604731, | |
| "loss": 0.2211, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012095746709891632, | |
| "loss": 0.2205, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012076951863306127, | |
| "loss": 0.2203, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0001205811243182823, | |
| "loss": 0.22, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012039228621482949, | |
| "loss": 0.2192, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012020300638780604, | |
| "loss": 0.219, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_runtime": 47.0946, | |
| "eval_samples_per_second": 229.325, | |
| "eval_steps_per_second": 7.177, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012001328690714582, | |
| "loss": 0.2194, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00011982312984759068, | |
| "loss": 0.2194, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00011963253728866778, | |
| "loss": 0.2189, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00011944151131466675, | |
| "loss": 0.219, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00011925005401461709, | |
| "loss": 0.2184, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00011905816748226513, | |
| "loss": 0.2182, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00011886585381605125, | |
| "loss": 0.2188, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00011867311511908693, | |
| "loss": 0.2179, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00011847995349913162, | |
| "loss": 0.218, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00011828637106856989, | |
| "loss": 0.2173, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_runtime": 46.7598, | |
| "eval_samples_per_second": 230.968, | |
| "eval_steps_per_second": 7.228, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011809236994438816, | |
| "loss": 0.2171, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011789795224815164, | |
| "loss": 0.2175, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011770312010598116, | |
| "loss": 0.2167, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011750787564852973, | |
| "loss": 0.2167, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011731222101095955, | |
| "loss": 0.2171, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011711615833291833, | |
| "loss": 0.2161, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0001169196897585161, | |
| "loss": 0.2168, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011672281743630175, | |
| "loss": 0.2162, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0001165255435192394, | |
| "loss": 0.2152, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011632787016468506, | |
| "loss": 0.216, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_runtime": 47.0992, | |
| "eval_samples_per_second": 229.303, | |
| "eval_steps_per_second": 7.176, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0001161297995343628, | |
| "loss": 0.2157, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011593133379434138, | |
| "loss": 0.215, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011573247511501028, | |
| "loss": 0.2154, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011553322567105619, | |
| "loss": 0.2155, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011533358764143905, | |
| "loss": 0.2149, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011513356320936841, | |
| "loss": 0.2144, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011493315456227943, | |
| "loss": 0.2147, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011473236389180894, | |
| "loss": 0.2145, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011453119339377154, | |
| "loss": 0.2146, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011432964526813558, | |
| "loss": 0.2145, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_runtime": 46.8321, | |
| "eval_samples_per_second": 230.611, | |
| "eval_steps_per_second": 7.217, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011412772171899904, | |
| "loss": 0.2132, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011392542495456556, | |
| "loss": 0.2133, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011372275718712006, | |
| "loss": 0.2125, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011351972063300484, | |
| "loss": 0.2135, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011331631751259515, | |
| "loss": 0.213, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00011311255005027487, | |
| "loss": 0.2132, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00011290842047441232, | |
| "loss": 0.2125, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00011270393101733585, | |
| "loss": 0.2122, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00011249908391530946, | |
| "loss": 0.2113, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011229388140850814, | |
| "loss": 0.2119, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_runtime": 46.8036, | |
| "eval_samples_per_second": 230.751, | |
| "eval_steps_per_second": 7.222, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011208832574099368, | |
| "loss": 0.2113, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011188241916068993, | |
| "loss": 0.2111, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011167616391935826, | |
| "loss": 0.2111, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011146956227257293, | |
| "loss": 0.2119, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00011126261647969645, | |
| "loss": 0.2115, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00011105532880385487, | |
| "loss": 0.2104, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00011084770151191299, | |
| "loss": 0.2107, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00011063973687444962, | |
| "loss": 0.2097, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00011043143716573272, | |
| "loss": 0.2107, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00011022280466369448, | |
| "loss": 0.2113, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_runtime": 47.0898, | |
| "eval_samples_per_second": 229.349, | |
| "eval_steps_per_second": 7.178, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00011001384164990662, | |
| "loss": 0.2099, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00010980455040955506, | |
| "loss": 0.21, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010959493323141538, | |
| "loss": 0.2091, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010938499240782739, | |
| "loss": 0.2098, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010917473023467032, | |
| "loss": 0.2096, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010896414901133761, | |
| "loss": 0.2085, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010875325104071177, | |
| "loss": 0.2093, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010854203862913927, | |
| "loss": 0.2084, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010833051408640509, | |
| "loss": 0.2083, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010811867972570786, | |
| "loss": 0.2084, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_runtime": 46.8854, | |
| "eval_samples_per_second": 230.349, | |
| "eval_steps_per_second": 7.209, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010790653786363416, | |
| "loss": 0.2082, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010769409082013337, | |
| "loss": 0.2081, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010748134091849238, | |
| "loss": 0.2077, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010726829048531, | |
| "loss": 0.2078, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010705494185047165, | |
| "loss": 0.2077, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001068412973471238, | |
| "loss": 0.2073, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00010662735931164853, | |
| "loss": 0.2076, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001064131300836379, | |
| "loss": 0.2069, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001061986120058684, | |
| "loss": 0.2067, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010598380742427543, | |
| "loss": 0.206, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_runtime": 46.6481, | |
| "eval_samples_per_second": 231.521, | |
| "eval_steps_per_second": 7.246, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010576871868792746, | |
| "loss": 0.206, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0001055533481490004, | |
| "loss": 0.2058, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.000105337698162752, | |
| "loss": 0.206, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010512177108749594, | |
| "loss": 0.2057, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010490556928457616, | |
| "loss": 0.2039, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010468909511834088, | |
| "loss": 0.205, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010447235095611692, | |
| "loss": 0.2045, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00010425533916818376, | |
| "loss": 0.2047, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00010403806212774747, | |
| "loss": 0.205, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000103820522210915, | |
| "loss": 0.2042, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_runtime": 46.7967, | |
| "eval_samples_per_second": 230.786, | |
| "eval_steps_per_second": 7.223, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00010360272179666802, | |
| "loss": 0.204, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00010338466326683697, | |
| "loss": 0.2037, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00010316634900607497, | |
| "loss": 0.2033, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00010294778140183182, | |
| "loss": 0.2035, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00010272896284432785, | |
| "loss": 0.2037, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00010250989572652766, | |
| "loss": 0.2028, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00010229058244411427, | |
| "loss": 0.2019, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00010207102539546251, | |
| "loss": 0.2032, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00010185122698161311, | |
| "loss": 0.2026, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00010163118960624632, | |
| "loss": 0.2024, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_runtime": 46.9319, | |
| "eval_samples_per_second": 230.121, | |
| "eval_steps_per_second": 7.202, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00010141091567565561, | |
| "loss": 0.2028, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00010119040759872142, | |
| "loss": 0.2018, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00010096966778688472, | |
| "loss": 0.2016, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00010074869865412074, | |
| "loss": 0.2024, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00010052750261691254, | |
| "loss": 0.2017, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0001003060820942245, | |
| "loss": 0.2015, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00010008443950747599, | |
| "loss": 0.2014, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 9.986257728051483e-05, | |
| "loss": 0.2014, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.964049783959082e-05, | |
| "loss": 0.2012, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.94182036133291e-05, | |
| "loss": 0.201, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_runtime": 47.2136, | |
| "eval_samples_per_second": 228.748, | |
| "eval_steps_per_second": 7.159, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.919569703270376e-05, | |
| "loss": 0.1998, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.89729805310111e-05, | |
| "loss": 0.2004, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.875005654384307e-05, | |
| "loss": 0.2009, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.852692750906071e-05, | |
| "loss": 0.1999, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.830359586676737e-05, | |
| "loss": 0.1997, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.808006405928215e-05, | |
| "loss": 0.2006, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.785633453111306e-05, | |
| "loss": 0.1999, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.763240972893037e-05, | |
| "loss": 0.1992, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.740829210153984e-05, | |
| "loss": 0.1991, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.718398409985593e-05, | |
| "loss": 0.199, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_runtime": 46.9221, | |
| "eval_samples_per_second": 230.169, | |
| "eval_steps_per_second": 7.203, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.695948817687504e-05, | |
| "loss": 0.1987, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.673480678764858e-05, | |
| "loss": 0.1982, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.650994238925626e-05, | |
| "loss": 0.1989, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.628489744077911e-05, | |
| "loss": 0.1985, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.60596744032726e-05, | |
| "loss": 0.1981, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.583427573973982e-05, | |
| "loss": 0.1976, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.560870391510441e-05, | |
| "loss": 0.1981, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.538296139618371e-05, | |
| "loss": 0.1978, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.515705065166178e-05, | |
| "loss": 0.1977, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.493097415206228e-05, | |
| "loss": 0.1974, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_runtime": 47.1161, | |
| "eval_samples_per_second": 229.221, | |
| "eval_steps_per_second": 7.174, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.47047343697216e-05, | |
| "loss": 0.1978, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.447833377876176e-05, | |
| "loss": 0.1974, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.425177485506336e-05, | |
| "loss": 0.1971, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.402506007623848e-05, | |
| "loss": 0.1968, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.379819192160362e-05, | |
| "loss": 0.1969, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.357117287215258e-05, | |
| "loss": 0.1966, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.334400541052928e-05, | |
| "loss": 0.1971, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.311669202100073e-05, | |
| "loss": 0.1962, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 9.288923518942968e-05, | |
| "loss": 0.1959, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 9.26616374032477e-05, | |
| "loss": 0.1964, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_runtime": 46.7963, | |
| "eval_samples_per_second": 230.788, | |
| "eval_steps_per_second": 7.223, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 9.243390115142761e-05, | |
| "loss": 0.196, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 9.220602892445661e-05, | |
| "loss": 0.1955, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.197802321430889e-05, | |
| "loss": 0.1958, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.174988651441833e-05, | |
| "loss": 0.1951, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.152162131965137e-05, | |
| "loss": 0.1954, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.129323012627956e-05, | |
| "loss": 0.1948, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 9.106471543195244e-05, | |
| "loss": 0.1954, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 9.08360797356701e-05, | |
| "loss": 0.1953, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 9.060732553775582e-05, | |
| "loss": 0.1949, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 9.037845533982892e-05, | |
| "loss": 0.1947, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_runtime": 46.9646, | |
| "eval_samples_per_second": 229.96, | |
| "eval_steps_per_second": 7.197, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 9.014947164477721e-05, | |
| "loss": 0.1946, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.992037695672967e-05, | |
| "loss": 0.1938, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.969117378102912e-05, | |
| "loss": 0.1946, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.946186462420478e-05, | |
| "loss": 0.1942, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.923245199394482e-05, | |
| "loss": 0.1934, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.900293839906903e-05, | |
| "loss": 0.194, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.87733263495013e-05, | |
| "loss": 0.1936, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.85436183562422e-05, | |
| "loss": 0.1933, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.83138169313416e-05, | |
| "loss": 0.1933, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.808392458787103e-05, | |
| "loss": 0.1931, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_runtime": 46.9712, | |
| "eval_samples_per_second": 229.928, | |
| "eval_steps_per_second": 7.196, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.78539438398963e-05, | |
| "loss": 0.1922, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.762387720245008e-05, | |
| "loss": 0.1922, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.73937271915042e-05, | |
| "loss": 0.1926, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.716349632394235e-05, | |
| "loss": 0.1924, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.69331871175324e-05, | |
| "loss": 0.1927, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.67028020908989e-05, | |
| "loss": 0.1924, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.647234376349565e-05, | |
| "loss": 0.1921, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.624181465557794e-05, | |
| "loss": 0.1914, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.601121728817519e-05, | |
| "loss": 0.1917, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.578055418306327e-05, | |
| "loss": 0.1918, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_runtime": 47.0452, | |
| "eval_samples_per_second": 229.566, | |
| "eval_steps_per_second": 7.185, | |
| "step": 50000 | |
| } | |
| ], | |
| "max_steps": 100000, | |
| "num_train_epochs": 3, | |
| "total_flos": 3.504974211922538e+21, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |