| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 1960, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04081632653061224, | |
| "grad_norm": 9.978917121887207, | |
| "learning_rate": 9.948979591836737e-06, | |
| "loss": 0.2617, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 10, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.08163265306122448, | |
| "grad_norm": 7.48874044418335, | |
| "learning_rate": 9.89795918367347e-06, | |
| "loss": 0.1563, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 20, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.12244897959183673, | |
| "grad_norm": 5.081777572631836, | |
| "learning_rate": 9.846938775510205e-06, | |
| "loss": 0.1254, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 30, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.16326530612244897, | |
| "grad_norm": 4.443576812744141, | |
| "learning_rate": 9.795918367346939e-06, | |
| "loss": 0.1113, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 40, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 4.356841087341309, | |
| "learning_rate": 9.744897959183674e-06, | |
| "loss": 0.1257, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 50, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.24489795918367346, | |
| "grad_norm": 1.98320472240448, | |
| "learning_rate": 9.693877551020408e-06, | |
| "loss": 0.0819, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 60, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 3.809190273284912, | |
| "learning_rate": 9.642857142857144e-06, | |
| "loss": 0.1032, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 70, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.32653061224489793, | |
| "grad_norm": 1.645442247390747, | |
| "learning_rate": 9.591836734693878e-06, | |
| "loss": 0.1124, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 80, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.3673469387755102, | |
| "grad_norm": 3.7085306644439697, | |
| "learning_rate": 9.540816326530612e-06, | |
| "loss": 0.0847, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 90, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 3.9240212440490723, | |
| "learning_rate": 9.489795918367348e-06, | |
| "loss": 0.0753, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 100, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.4489795918367347, | |
| "grad_norm": 3.737152338027954, | |
| "learning_rate": 9.438775510204082e-06, | |
| "loss": 0.0723, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 110, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.4897959183673469, | |
| "grad_norm": 2.308751344680786, | |
| "learning_rate": 9.387755102040818e-06, | |
| "loss": 0.0699, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 120, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.5306122448979592, | |
| "grad_norm": 2.3706369400024414, | |
| "learning_rate": 9.336734693877552e-06, | |
| "loss": 0.0589, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 130, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.968673050403595, | |
| "learning_rate": 9.285714285714288e-06, | |
| "loss": 0.0503, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 140, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 1.9251790046691895, | |
| "learning_rate": 9.234693877551022e-06, | |
| "loss": 0.0628, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 150, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.6530612244897959, | |
| "grad_norm": 1.7473604679107666, | |
| "learning_rate": 9.183673469387756e-06, | |
| "loss": 0.0708, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 160, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.6938775510204082, | |
| "grad_norm": 1.9279741048812866, | |
| "learning_rate": 9.13265306122449e-06, | |
| "loss": 0.075, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 170, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.7346938775510204, | |
| "grad_norm": 1.4570097923278809, | |
| "learning_rate": 9.081632653061225e-06, | |
| "loss": 0.0614, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 180, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.7755102040816326, | |
| "grad_norm": 2.014692544937134, | |
| "learning_rate": 9.03061224489796e-06, | |
| "loss": 0.058, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 190, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 1.7634117603302002, | |
| "learning_rate": 8.979591836734695e-06, | |
| "loss": 0.0629, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 200, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.7091555595397949, | |
| "learning_rate": 8.92857142857143e-06, | |
| "loss": 0.0637, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 210, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.8979591836734694, | |
| "grad_norm": 1.073096752166748, | |
| "learning_rate": 8.877551020408163e-06, | |
| "loss": 0.0603, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 220, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.9387755102040817, | |
| "grad_norm": 0.7938856482505798, | |
| "learning_rate": 8.826530612244899e-06, | |
| "loss": 0.0538, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 230, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 0.9795918367346939, | |
| "grad_norm": 1.192353367805481, | |
| "learning_rate": 8.775510204081633e-06, | |
| "loss": 0.0493, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 240, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.0204081632653061, | |
| "grad_norm": 0.9369480013847351, | |
| "learning_rate": 8.724489795918369e-06, | |
| "loss": 0.0595, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 250, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.0612244897959184, | |
| "grad_norm": 1.2866365909576416, | |
| "learning_rate": 8.673469387755103e-06, | |
| "loss": 0.0532, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 260, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.1020408163265305, | |
| "grad_norm": 16.09465980529785, | |
| "learning_rate": 8.622448979591837e-06, | |
| "loss": 0.0663, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 270, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 2.3071987628936768, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.0633, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 280, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.183673469387755, | |
| "grad_norm": 1.2905592918395996, | |
| "learning_rate": 8.520408163265307e-06, | |
| "loss": 0.0498, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 290, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.2244897959183674, | |
| "grad_norm": 1.4856091737747192, | |
| "learning_rate": 8.469387755102042e-06, | |
| "loss": 0.0639, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 300, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.2653061224489797, | |
| "grad_norm": 0.5537325739860535, | |
| "learning_rate": 8.418367346938776e-06, | |
| "loss": 0.0673, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 310, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.306122448979592, | |
| "grad_norm": 1.2954118251800537, | |
| "learning_rate": 8.36734693877551e-06, | |
| "loss": 0.0505, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 320, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.346938775510204, | |
| "grad_norm": 0.6809917092323303, | |
| "learning_rate": 8.316326530612246e-06, | |
| "loss": 0.0623, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 330, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.3877551020408163, | |
| "grad_norm": 2.596815586090088, | |
| "learning_rate": 8.26530612244898e-06, | |
| "loss": 0.0552, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 340, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 1.0378236770629883, | |
| "learning_rate": 8.214285714285714e-06, | |
| "loss": 0.049, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 350, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.469387755102041, | |
| "grad_norm": 1.7493040561676025, | |
| "learning_rate": 8.16326530612245e-06, | |
| "loss": 0.0465, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 360, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.510204081632653, | |
| "grad_norm": 1.149561882019043, | |
| "learning_rate": 8.112244897959184e-06, | |
| "loss": 0.0584, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 370, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.5510204081632653, | |
| "grad_norm": 0.8010720014572144, | |
| "learning_rate": 8.06122448979592e-06, | |
| "loss": 0.047, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 380, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.5918367346938775, | |
| "grad_norm": 0.7010307908058167, | |
| "learning_rate": 8.010204081632654e-06, | |
| "loss": 0.0649, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 390, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.6326530612244898, | |
| "grad_norm": 2.022503137588501, | |
| "learning_rate": 7.959183673469388e-06, | |
| "loss": 0.0612, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 400, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.6734693877551021, | |
| "grad_norm": 1.3006742000579834, | |
| "learning_rate": 7.908163265306124e-06, | |
| "loss": 0.0605, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 410, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 1.513334035873413, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 0.054, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 420, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.7551020408163265, | |
| "grad_norm": 0.28943702578544617, | |
| "learning_rate": 7.806122448979593e-06, | |
| "loss": 0.0673, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 430, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.7959183673469388, | |
| "grad_norm": 1.2818681001663208, | |
| "learning_rate": 7.755102040816327e-06, | |
| "loss": 0.0614, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 440, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.836734693877551, | |
| "grad_norm": 0.5026584267616272, | |
| "learning_rate": 7.704081632653061e-06, | |
| "loss": 0.0443, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 450, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.8775510204081631, | |
| "grad_norm": 0.400056391954422, | |
| "learning_rate": 7.653061224489796e-06, | |
| "loss": 0.054, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 460, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.9183673469387754, | |
| "grad_norm": 0.7661588191986084, | |
| "learning_rate": 7.602040816326531e-06, | |
| "loss": 0.0439, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 470, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 1.9591836734693877, | |
| "grad_norm": 0.3066469728946686, | |
| "learning_rate": 7.551020408163265e-06, | |
| "loss": 0.0511, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 480, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.1751477718353271, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.0644, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 490, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.0408163265306123, | |
| "grad_norm": 0.6497346758842468, | |
| "learning_rate": 7.448979591836736e-06, | |
| "loss": 0.0596, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.0816326530612246, | |
| "grad_norm": 0.585145890712738, | |
| "learning_rate": 7.39795918367347e-06, | |
| "loss": 0.0502, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 510, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.122448979591837, | |
| "grad_norm": 1.0224946737289429, | |
| "learning_rate": 7.346938775510205e-06, | |
| "loss": 0.0462, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 520, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.163265306122449, | |
| "grad_norm": 0.9922281503677368, | |
| "learning_rate": 7.295918367346939e-06, | |
| "loss": 0.063, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 530, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.204081632653061, | |
| "grad_norm": 0.7550894618034363, | |
| "learning_rate": 7.244897959183675e-06, | |
| "loss": 0.0595, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 540, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.2448979591836733, | |
| "grad_norm": 1.000552773475647, | |
| "learning_rate": 7.193877551020409e-06, | |
| "loss": 0.0645, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 550, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.7375513315200806, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.0597, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 560, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.326530612244898, | |
| "grad_norm": 0.7129970192909241, | |
| "learning_rate": 7.091836734693878e-06, | |
| "loss": 0.0603, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 570, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.36734693877551, | |
| "grad_norm": 0.8948765993118286, | |
| "learning_rate": 7.0408163265306125e-06, | |
| "loss": 0.0673, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 580, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.4081632653061225, | |
| "grad_norm": 0.4436047375202179, | |
| "learning_rate": 6.989795918367348e-06, | |
| "loss": 0.0547, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 590, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.4489795918367347, | |
| "grad_norm": 2.562260627746582, | |
| "learning_rate": 6.938775510204082e-06, | |
| "loss": 0.044, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 600, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.489795918367347, | |
| "grad_norm": 2.5978403091430664, | |
| "learning_rate": 6.887755102040817e-06, | |
| "loss": 0.042, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 610, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.5306122448979593, | |
| "grad_norm": 0.8350633978843689, | |
| "learning_rate": 6.836734693877551e-06, | |
| "loss": 0.0429, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 620, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 1.0908092260360718, | |
| "learning_rate": 6.785714285714287e-06, | |
| "loss": 0.0815, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 630, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.612244897959184, | |
| "grad_norm": 1.411789059638977, | |
| "learning_rate": 6.734693877551021e-06, | |
| "loss": 0.0506, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 640, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.6530612244897958, | |
| "grad_norm": 0.7262634038925171, | |
| "learning_rate": 6.683673469387756e-06, | |
| "loss": 0.0486, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 650, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.693877551020408, | |
| "grad_norm": 0.6718008518218994, | |
| "learning_rate": 6.63265306122449e-06, | |
| "loss": 0.0478, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 660, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.7346938775510203, | |
| "grad_norm": 0.6992954015731812, | |
| "learning_rate": 6.581632653061225e-06, | |
| "loss": 0.0876, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 670, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.7755102040816326, | |
| "grad_norm": 3.032949447631836, | |
| "learning_rate": 6.530612244897959e-06, | |
| "loss": 0.0497, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 680, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.816326530612245, | |
| "grad_norm": 0.544232189655304, | |
| "learning_rate": 6.4795918367346946e-06, | |
| "loss": 0.0456, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 690, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.489704430103302, | |
| "learning_rate": 6.4285714285714295e-06, | |
| "loss": 0.0438, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 700, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.8979591836734695, | |
| "grad_norm": 0.7447965741157532, | |
| "learning_rate": 6.3775510204081635e-06, | |
| "loss": 0.0557, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 710, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.938775510204082, | |
| "grad_norm": 3.607469081878662, | |
| "learning_rate": 6.326530612244899e-06, | |
| "loss": 0.059, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 720, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 2.979591836734694, | |
| "grad_norm": 0.2003553807735443, | |
| "learning_rate": 6.275510204081633e-06, | |
| "loss": 0.0349, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 730, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.020408163265306, | |
| "grad_norm": 1.135377287864685, | |
| "learning_rate": 6.224489795918368e-06, | |
| "loss": 0.0549, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 740, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.061224489795918, | |
| "grad_norm": 0.9238697290420532, | |
| "learning_rate": 6.173469387755102e-06, | |
| "loss": 0.0627, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 750, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.1020408163265305, | |
| "grad_norm": 0.7442536354064941, | |
| "learning_rate": 6.122448979591837e-06, | |
| "loss": 0.0528, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 760, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.142857142857143, | |
| "grad_norm": 0.6410558819770813, | |
| "learning_rate": 6.071428571428571e-06, | |
| "loss": 0.0707, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 770, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.183673469387755, | |
| "grad_norm": 0.4915910065174103, | |
| "learning_rate": 6.020408163265307e-06, | |
| "loss": 0.0659, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 780, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.2244897959183674, | |
| "grad_norm": 0.33948495984077454, | |
| "learning_rate": 5.969387755102042e-06, | |
| "loss": 0.0535, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 790, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.2653061224489797, | |
| "grad_norm": 0.9314869046211243, | |
| "learning_rate": 5.918367346938776e-06, | |
| "loss": 0.0443, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 800, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.306122448979592, | |
| "grad_norm": 0.9704706072807312, | |
| "learning_rate": 5.867346938775511e-06, | |
| "loss": 0.0562, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 810, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.3469387755102042, | |
| "grad_norm": 0.8564426898956299, | |
| "learning_rate": 5.816326530612246e-06, | |
| "loss": 0.0466, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 820, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.387755102040816, | |
| "grad_norm": 0.31214070320129395, | |
| "learning_rate": 5.7653061224489805e-06, | |
| "loss": 0.0488, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 830, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.4285714285714284, | |
| "grad_norm": 0.40054649114608765, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.0536, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 840, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.4693877551020407, | |
| "grad_norm": 0.476951003074646, | |
| "learning_rate": 5.663265306122449e-06, | |
| "loss": 0.0819, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 850, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.510204081632653, | |
| "grad_norm": 1.075916051864624, | |
| "learning_rate": 5.6122448979591834e-06, | |
| "loss": 0.0451, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 860, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.5510204081632653, | |
| "grad_norm": 0.4422233998775482, | |
| "learning_rate": 5.561224489795919e-06, | |
| "loss": 0.065, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 870, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.5918367346938775, | |
| "grad_norm": 0.7247931361198425, | |
| "learning_rate": 5.510204081632653e-06, | |
| "loss": 0.0442, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 880, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.63265306122449, | |
| "grad_norm": 0.18422362208366394, | |
| "learning_rate": 5.459183673469388e-06, | |
| "loss": 0.0295, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 890, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.673469387755102, | |
| "grad_norm": 0.6566686034202576, | |
| "learning_rate": 5.408163265306123e-06, | |
| "loss": 0.0527, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 900, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.7142857142857144, | |
| "grad_norm": 0.7151392698287964, | |
| "learning_rate": 5.357142857142857e-06, | |
| "loss": 0.0614, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 910, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.7551020408163263, | |
| "grad_norm": 0.1488690972328186, | |
| "learning_rate": 5.306122448979593e-06, | |
| "loss": 0.0546, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 920, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.795918367346939, | |
| "grad_norm": 0.472126841545105, | |
| "learning_rate": 5.255102040816327e-06, | |
| "loss": 0.0514, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 930, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.836734693877551, | |
| "grad_norm": 0.8530511260032654, | |
| "learning_rate": 5.204081632653062e-06, | |
| "loss": 0.049, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 940, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.877551020408163, | |
| "grad_norm": 1.6832056045532227, | |
| "learning_rate": 5.153061224489796e-06, | |
| "loss": 0.0603, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 950, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.9183673469387754, | |
| "grad_norm": 0.30192047357559204, | |
| "learning_rate": 5.1020408163265315e-06, | |
| "loss": 0.0512, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 960, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 3.9591836734693877, | |
| "grad_norm": 1.3734880685806274, | |
| "learning_rate": 5.0510204081632655e-06, | |
| "loss": 0.0756, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 970, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.7525829672813416, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0715, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 980, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.040816326530612, | |
| "grad_norm": 0.4748665690422058, | |
| "learning_rate": 4.948979591836735e-06, | |
| "loss": 0.0487, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 990, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.081632653061225, | |
| "grad_norm": 1.340325117111206, | |
| "learning_rate": 4.897959183673469e-06, | |
| "loss": 0.0638, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1000, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.122448979591836, | |
| "grad_norm": 0.5442948937416077, | |
| "learning_rate": 4.846938775510204e-06, | |
| "loss": 0.0642, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1010, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.163265306122449, | |
| "grad_norm": 0.3119046688079834, | |
| "learning_rate": 4.795918367346939e-06, | |
| "loss": 0.0411, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1020, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.204081632653061, | |
| "grad_norm": 0.7393902540206909, | |
| "learning_rate": 4.744897959183674e-06, | |
| "loss": 0.0544, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1030, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.244897959183674, | |
| "grad_norm": 0.5412510633468628, | |
| "learning_rate": 4.693877551020409e-06, | |
| "loss": 0.0406, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1040, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.285714285714286, | |
| "grad_norm": 0.6453996300697327, | |
| "learning_rate": 4.642857142857144e-06, | |
| "loss": 0.0499, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1050, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.326530612244898, | |
| "grad_norm": 0.3400985896587372, | |
| "learning_rate": 4.591836734693878e-06, | |
| "loss": 0.046, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1060, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.36734693877551, | |
| "grad_norm": 0.5143836736679077, | |
| "learning_rate": 4.540816326530613e-06, | |
| "loss": 0.0494, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1070, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.408163265306122, | |
| "grad_norm": 0.38877835869789124, | |
| "learning_rate": 4.489795918367348e-06, | |
| "loss": 0.0526, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1080, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.448979591836735, | |
| "grad_norm": 0.38251811265945435, | |
| "learning_rate": 4.438775510204082e-06, | |
| "loss": 0.051, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1090, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.489795918367347, | |
| "grad_norm": 0.3022618889808655, | |
| "learning_rate": 4.3877551020408165e-06, | |
| "loss": 0.0368, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1100, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.530612244897959, | |
| "grad_norm": 0.12300197780132294, | |
| "learning_rate": 4.336734693877551e-06, | |
| "loss": 0.0474, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1110, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.571428571428571, | |
| "grad_norm": 0.7275770902633667, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.0409, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1120, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.612244897959184, | |
| "grad_norm": 0.46302053332328796, | |
| "learning_rate": 4.234693877551021e-06, | |
| "loss": 0.0545, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1130, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.653061224489796, | |
| "grad_norm": 1.1193764209747314, | |
| "learning_rate": 4.183673469387755e-06, | |
| "loss": 0.0736, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1140, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.6938775510204085, | |
| "grad_norm": 0.936698317527771, | |
| "learning_rate": 4.13265306122449e-06, | |
| "loss": 0.0532, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1150, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.73469387755102, | |
| "grad_norm": 1.091784119606018, | |
| "learning_rate": 4.081632653061225e-06, | |
| "loss": 0.064, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1160, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.775510204081632, | |
| "grad_norm": 0.3371049165725708, | |
| "learning_rate": 4.03061224489796e-06, | |
| "loss": 0.0557, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1170, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.816326530612245, | |
| "grad_norm": 0.5533121824264526, | |
| "learning_rate": 3.979591836734694e-06, | |
| "loss": 0.0449, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1180, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.857142857142857, | |
| "grad_norm": 1.3483092784881592, | |
| "learning_rate": 3.928571428571429e-06, | |
| "loss": 0.0551, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1190, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.8979591836734695, | |
| "grad_norm": 2.4415154457092285, | |
| "learning_rate": 3.877551020408164e-06, | |
| "loss": 0.0738, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1200, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.938775510204081, | |
| "grad_norm": 0.4990352690219879, | |
| "learning_rate": 3.826530612244898e-06, | |
| "loss": 0.0663, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1210, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 4.979591836734694, | |
| "grad_norm": 1.045630693435669, | |
| "learning_rate": 3.7755102040816327e-06, | |
| "loss": 0.0422, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1220, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.020408163265306, | |
| "grad_norm": 3.719482660293579, | |
| "learning_rate": 3.724489795918368e-06, | |
| "loss": 0.0531, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1230, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.061224489795919, | |
| "grad_norm": 0.6931941509246826, | |
| "learning_rate": 3.6734693877551024e-06, | |
| "loss": 0.0434, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1240, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.1020408163265305, | |
| "grad_norm": 0.945284903049469, | |
| "learning_rate": 3.6224489795918373e-06, | |
| "loss": 0.0377, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1250, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.142857142857143, | |
| "grad_norm": 0.49527707695961, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 0.0406, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1260, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.183673469387755, | |
| "grad_norm": 1.0614029169082642, | |
| "learning_rate": 3.5204081632653062e-06, | |
| "loss": 0.0614, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1270, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.224489795918367, | |
| "grad_norm": 1.208749771118164, | |
| "learning_rate": 3.469387755102041e-06, | |
| "loss": 0.0449, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1280, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.26530612244898, | |
| "grad_norm": 3.612487554550171, | |
| "learning_rate": 3.4183673469387756e-06, | |
| "loss": 0.0672, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1290, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.3061224489795915, | |
| "grad_norm": 0.6228938102722168, | |
| "learning_rate": 3.3673469387755105e-06, | |
| "loss": 0.0516, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1300, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.346938775510204, | |
| "grad_norm": 0.586557924747467, | |
| "learning_rate": 3.316326530612245e-06, | |
| "loss": 0.0674, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1310, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.387755102040816, | |
| "grad_norm": 0.963624119758606, | |
| "learning_rate": 3.2653061224489794e-06, | |
| "loss": 0.0621, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1320, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.428571428571429, | |
| "grad_norm": 1.1783013343811035, | |
| "learning_rate": 3.2142857142857147e-06, | |
| "loss": 0.0366, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1330, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.469387755102041, | |
| "grad_norm": 4.429933547973633, | |
| "learning_rate": 3.1632653061224496e-06, | |
| "loss": 0.0511, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1340, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.510204081632653, | |
| "grad_norm": 4.795422077178955, | |
| "learning_rate": 3.112244897959184e-06, | |
| "loss": 0.0601, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1350, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.551020408163265, | |
| "grad_norm": 0.19068406522274017, | |
| "learning_rate": 3.0612244897959185e-06, | |
| "loss": 0.0479, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1360, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.591836734693878, | |
| "grad_norm": 3.7448017597198486, | |
| "learning_rate": 3.0102040816326534e-06, | |
| "loss": 0.0404, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1370, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.63265306122449, | |
| "grad_norm": 0.3368137776851654, | |
| "learning_rate": 2.959183673469388e-06, | |
| "loss": 0.0488, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1380, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.673469387755102, | |
| "grad_norm": 0.14420035481452942, | |
| "learning_rate": 2.908163265306123e-06, | |
| "loss": 0.0582, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1390, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.714285714285714, | |
| "grad_norm": 0.372368723154068, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.0391, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1400, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.755102040816326, | |
| "grad_norm": 3.4565131664276123, | |
| "learning_rate": 2.8061224489795917e-06, | |
| "loss": 0.0616, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1410, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.795918367346939, | |
| "grad_norm": 3.389681339263916, | |
| "learning_rate": 2.7551020408163266e-06, | |
| "loss": 0.0675, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1420, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.836734693877551, | |
| "grad_norm": 0.7100503444671631, | |
| "learning_rate": 2.7040816326530615e-06, | |
| "loss": 0.036, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1430, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.877551020408164, | |
| "grad_norm": 0.40696802735328674, | |
| "learning_rate": 2.6530612244897964e-06, | |
| "loss": 0.0632, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1440, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.918367346938775, | |
| "grad_norm": 0.7590793967247009, | |
| "learning_rate": 2.602040816326531e-06, | |
| "loss": 0.0549, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1450, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 5.959183673469388, | |
| "grad_norm": 0.48597386479377747, | |
| "learning_rate": 2.5510204081632657e-06, | |
| "loss": 0.0393, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1460, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 2.6455276012420654, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0566, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1470, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.040816326530612, | |
| "grad_norm": 2.350471258163452, | |
| "learning_rate": 2.4489795918367347e-06, | |
| "loss": 0.0509, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1480, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.081632653061225, | |
| "grad_norm": 3.315977096557617, | |
| "learning_rate": 2.3979591836734696e-06, | |
| "loss": 0.0523, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1490, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.122448979591836, | |
| "grad_norm": 1.6327887773513794, | |
| "learning_rate": 2.3469387755102044e-06, | |
| "loss": 0.0432, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1500, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.163265306122449, | |
| "grad_norm": 5.029656410217285, | |
| "learning_rate": 2.295918367346939e-06, | |
| "loss": 0.0468, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1510, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.204081632653061, | |
| "grad_norm": 3.1543941497802734, | |
| "learning_rate": 2.244897959183674e-06, | |
| "loss": 0.0471, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1520, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.244897959183674, | |
| "grad_norm": 1.1178909540176392, | |
| "learning_rate": 2.1938775510204083e-06, | |
| "loss": 0.0538, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1530, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.285714285714286, | |
| "grad_norm": 0.7284368276596069, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 0.0474, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1540, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.326530612244898, | |
| "grad_norm": 0.15386615693569183, | |
| "learning_rate": 2.0918367346938776e-06, | |
| "loss": 0.0327, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1550, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.36734693877551, | |
| "grad_norm": 12.00415325164795, | |
| "learning_rate": 2.0408163265306125e-06, | |
| "loss": 0.0568, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1560, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.408163265306122, | |
| "grad_norm": 0.14763076603412628, | |
| "learning_rate": 1.989795918367347e-06, | |
| "loss": 0.0389, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1570, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.448979591836735, | |
| "grad_norm": 0.10665205121040344, | |
| "learning_rate": 1.938775510204082e-06, | |
| "loss": 0.0526, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1580, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.489795918367347, | |
| "grad_norm": 0.6945566534996033, | |
| "learning_rate": 1.8877551020408163e-06, | |
| "loss": 0.0276, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1590, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.530612244897959, | |
| "grad_norm": 0.6304193735122681, | |
| "learning_rate": 1.8367346938775512e-06, | |
| "loss": 0.0595, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1600, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.571428571428571, | |
| "grad_norm": 0.738591194152832, | |
| "learning_rate": 1.7857142857142859e-06, | |
| "loss": 0.0591, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1610, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.612244897959184, | |
| "grad_norm": 1.1249669790267944, | |
| "learning_rate": 1.7346938775510206e-06, | |
| "loss": 0.0444, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1620, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.653061224489796, | |
| "grad_norm": 0.3204442858695984, | |
| "learning_rate": 1.6836734693877552e-06, | |
| "loss": 0.041, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1630, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.6938775510204085, | |
| "grad_norm": 0.6603041887283325, | |
| "learning_rate": 1.6326530612244897e-06, | |
| "loss": 0.0485, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1640, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.73469387755102, | |
| "grad_norm": 0.9167451858520508, | |
| "learning_rate": 1.5816326530612248e-06, | |
| "loss": 0.051, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1650, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.775510204081632, | |
| "grad_norm": 1.1892409324645996, | |
| "learning_rate": 1.5306122448979593e-06, | |
| "loss": 0.0577, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1660, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.816326530612245, | |
| "grad_norm": 1.1679530143737793, | |
| "learning_rate": 1.479591836734694e-06, | |
| "loss": 0.0581, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1670, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.857142857142857, | |
| "grad_norm": 4.730435848236084, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.058, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1680, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.8979591836734695, | |
| "grad_norm": 2.7492659091949463, | |
| "learning_rate": 1.3775510204081633e-06, | |
| "loss": 0.0593, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1690, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.938775510204081, | |
| "grad_norm": 0.29811447858810425, | |
| "learning_rate": 1.3265306122448982e-06, | |
| "loss": 0.0425, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1700, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 6.979591836734694, | |
| "grad_norm": 0.15881459414958954, | |
| "learning_rate": 1.2755102040816329e-06, | |
| "loss": 0.0625, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1710, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.020408163265306, | |
| "grad_norm": 0.20461368560791016, | |
| "learning_rate": 1.2244897959183673e-06, | |
| "loss": 0.0581, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1720, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.061224489795919, | |
| "grad_norm": 1.1351808309555054, | |
| "learning_rate": 1.1734693877551022e-06, | |
| "loss": 0.0646, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1730, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.1020408163265305, | |
| "grad_norm": 0.1654195487499237, | |
| "learning_rate": 1.122448979591837e-06, | |
| "loss": 0.0472, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1740, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.142857142857143, | |
| "grad_norm": 0.4366483986377716, | |
| "learning_rate": 1.0714285714285714e-06, | |
| "loss": 0.0461, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1750, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.183673469387755, | |
| "grad_norm": 0.5380903482437134, | |
| "learning_rate": 1.0204081632653063e-06, | |
| "loss": 0.0506, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1760, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.224489795918367, | |
| "grad_norm": 1.661912441253662, | |
| "learning_rate": 9.69387755102041e-07, | |
| "loss": 0.0664, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1770, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.26530612244898, | |
| "grad_norm": 0.4192713499069214, | |
| "learning_rate": 9.183673469387756e-07, | |
| "loss": 0.0394, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1780, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.3061224489795915, | |
| "grad_norm": 0.6668973565101624, | |
| "learning_rate": 8.673469387755103e-07, | |
| "loss": 0.0401, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1790, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.346938775510204, | |
| "grad_norm": 0.5573325753211975, | |
| "learning_rate": 8.163265306122449e-07, | |
| "loss": 0.0526, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1800, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.387755102040816, | |
| "grad_norm": 0.39288291335105896, | |
| "learning_rate": 7.653061224489796e-07, | |
| "loss": 0.0445, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1810, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.428571428571429, | |
| "grad_norm": 0.7398673892021179, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 0.054, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1820, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.469387755102041, | |
| "grad_norm": 2.143411636352539, | |
| "learning_rate": 6.632653061224491e-07, | |
| "loss": 0.0458, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1830, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.510204081632653, | |
| "grad_norm": 0.3958425223827362, | |
| "learning_rate": 6.122448979591837e-07, | |
| "loss": 0.0641, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1840, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.551020408163265, | |
| "grad_norm": 2.797384023666382, | |
| "learning_rate": 5.612244897959184e-07, | |
| "loss": 0.0447, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1850, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.591836734693878, | |
| "grad_norm": 1.5026339292526245, | |
| "learning_rate": 5.102040816326531e-07, | |
| "loss": 0.0274, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1860, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.63265306122449, | |
| "grad_norm": 0.993212103843689, | |
| "learning_rate": 4.591836734693878e-07, | |
| "loss": 0.0393, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1870, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.673469387755102, | |
| "grad_norm": 0.16298241913318634, | |
| "learning_rate": 4.0816326530612243e-07, | |
| "loss": 0.055, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1880, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.714285714285714, | |
| "grad_norm": 4.067746639251709, | |
| "learning_rate": 3.5714285714285716e-07, | |
| "loss": 0.0661, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1890, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.755102040816326, | |
| "grad_norm": 1.387778878211975, | |
| "learning_rate": 3.0612244897959183e-07, | |
| "loss": 0.0586, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1900, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.795918367346939, | |
| "grad_norm": 0.6988309621810913, | |
| "learning_rate": 2.5510204081632656e-07, | |
| "loss": 0.0664, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1910, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.836734693877551, | |
| "grad_norm": 0.7139838933944702, | |
| "learning_rate": 2.0408163265306121e-07, | |
| "loss": 0.053, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1920, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.877551020408164, | |
| "grad_norm": 0.5550429224967957, | |
| "learning_rate": 1.5306122448979592e-07, | |
| "loss": 0.0458, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1930, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.918367346938775, | |
| "grad_norm": 1.2351597547531128, | |
| "learning_rate": 1.0204081632653061e-07, | |
| "loss": 0.0471, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1940, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 7.959183673469388, | |
| "grad_norm": 0.6292315125465393, | |
| "learning_rate": 5.1020408163265303e-08, | |
| "loss": 0.0532, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1950, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.6500349044799805, | |
| "learning_rate": 0.0, | |
| "loss": 0.0453, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1960, | |
| "total_memory_available (GB)": 94.62 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "max_memory_allocated (GB)": 57.18, | |
| "memory_allocated (GB)": 50.57, | |
| "step": 1960, | |
| "total_flos": 4.89583144415232e+16, | |
| "total_memory_available (GB)": 94.62, | |
| "train_loss": 0.057532464606421335, | |
| "train_runtime": 1666.2328, | |
| "train_samples_per_second": 52.538, | |
| "train_steps_per_second": 1.315 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1960, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.89583144415232e+16, | |
| "train_batch_size": 40, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |